import random

import scrapy
# from util.headers import get_header

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
}


class CcgpSpider(scrapy.Spider):
    name = "ccgp"

    async def start(self):
        url = f'http://htgs.ccgp.gov.cn/GS8/genCodeImg?t={random.random()}'
        yield scrapy.Request(url=url, callback=self.parse, headers=headers)

    async def parse(self, response):
        code = response.json()['msg']
        url = 'http://htgs.ccgp.gov.cn/GS8/contractpublish/getContractByAjax?contractSign=0'
        for i in range(20):
            yield scrapy.FormRequest(url, formdata={
                'code': 'pageNoChange',
                'currentPage': f'{i}',
                'codeResult': code
            }, headers=headers, callback=self.parse_index)

    async def parse_index(self, response):
        for i in response.json()['rows']:
            uuid = i['uuid']
            detail_url = f'http://htgs.ccgp.gov.cn/GS8/contractpublish/detail/{uuid}?contractSign=0'

            yield scrapy.Request(detail_url, headers=get_header(), callback=self.parse_detail)

    async def parse_detail(self, response):
        pdf_uuid = response.xpath('//a[@href="#"]/@onclick').getall()
        title = response.xpath('//h2/text()').get().strip()
        content = response.xpath('//p/text()').getall()
        if pdf_uuid:
            for uuid in pdf_uuid:
                pdf_uuid = uuid.split("'")[1]
                pdf_url = f'https://download.ccgp.gov.cn/oss/download?uuid={pdf_uuid}'
                print(pdf_url, title)

        # for text in content:
        #     print(text.strip())
